#pip install "unstructured[html]"
#示例：html_loader.py
from langchain_community.document_loaders import UnstructuredHTMLLoader

file_path = "llm_langchain_data/content.html"
loader = UnstructuredHTMLLoader(file_path, encodings="UTF-8")
data = loader.load()
print(data)